#---------------------------------------------------------------------------------------------------------------------------#
#---Do Stereotypes Explain Discrimination Against Minority Candidates or Discrimination in Favor of Majority Candidates? ---#
#------------------------------------------- British Journal of Political Science ------------------------------------------#
#----------------------------------------------------- Lea Portmann --------------------------------------------------------#
#---------------------------------------------------- 19 October 2020 ------------------------------------------------------#
#------------------------------------------------ Latent Profile Analysis --------------------------------------------------#
#---------------------------------------------------------------------------------------------------------------------------#

library(dplyr)
library(plyr)
library(Hmisc)
library(ggplot2)
library(mclust)
library(xtable)
library(viridis)
library(reshape2)
library(tidyr)
library(dplyr)

rm(list=ls())
setwd(".../...")
load("dat")

# ----------------------------------------------------------------------------------- Data preparation

dat[,8:25] <- apply(dat[,8:25], 2, function(x) as.numeric(as.character(x)))

# ----------------------------------------------------------------------------------- LPA

# --------- Prepare data

dat_lpa <- dat %>% 
dplyr::select(c("EXSTYP_IT_1", "EXSTYP_IT_2", "EXSTYP_IT_3", 
"EXSTYP_IT_4", "EXSTYP_IT_5", "EXSTYP_IT_6", "EXSTYP_IT_7", "EXSTYP_IT_8", 
"EXSTYP_IT_9", "EXSTYP_AL_1", "EXSTYP_AL_2", "EXSTYP_AL_3", "EXSTYP_AL_4", 
"EXSTYP_AL_5", "EXSTYP_AL_6", "EXSTYP_AL_7", "EXSTYP_AL_8", "EXSTYP_AL_9")) %>%
na.omit() 

# --------- Select most suitable model/number of groups

# --------- Table 13 online appendix: BIC for models with profiles ranging from 1 to 9

BIC <- mclustBIC(dat_lpa)
plot(BIC)
summary(BIC)
dat_BIC_T1 <- as.matrix(summary(BIC))
dat_BIC_T1 <- as.data.frame(dat_BIC_T1)
names(dat_BIC_T1)[1] <- "BIC"
dat_BIC_T1 <- tibble::rownames_to_column(dat_BIC_T1, "Model")
dat_BIC_T1 <- dat_BIC_T1 %>% separate(Model, c("Model", "Groups"), "([\\,\\?\\:])") 
print(xtable(dat_BIC_T1, caption="Best model fits according to BIC"), include.rownames=FALSE)

#EEV 3 best model fit (largest BIC in mclust) 

# --------- Model

m1 <- Mclust(dat_lpa, modelNames = "EEV", G = 3, x = BIC)
summary(m1)

# --------- Calculate percentages

round(895/(895  + 451 + 41), digits = 2)
round(451/(895  + 451 + 41), digits = 2)
round(41/(895  + 451 + 41), digits = 2)

# ---------- Figure 4: Evaluation of candidates by profile (means and confidence intervals), from LPA

#Prepare data
means <- data.frame(m1$parameters$mean, 
stringsAsFactors = FALSE) %>%
tibble::rownames_to_column() %>%
melt(id.vars = "rowname", variable.name = "Profile", value.name = "Mean") %>%
dplyr::mutate(Mean = round(Mean, 2))
names(means)[names(means) == "rowname"] <- "Rating"

means %>%
ggplot(aes(Rating, Mean, group = Profile, color = Profile)) +
geom_point(size = 2.25) +
geom_line(size = 1.25)

#Prepare bootstrap
boot <- MclustBootstrap(m1, nboot = 999, type = "bs")
boot.ci <- summary(boot, what="ci")

boots_dat <- as.data.frame(boot.ci$mean)
boots_dat$id <- rownames(boots_dat)
boots_dat_long <- gather(boots_dat, rating, value, -id)
boots_dat_long$rating <- as.character(boots_dat_long$rating)
boots_dat_long <- boots_dat_long %>% separate(rating , c("Rating", "Profile"), "([\\.\\?\\:])") 
boots_dat <- spread(boots_dat_long, id, value)

#Prepare means
means$Profile <- substr(means$Profile, 2, 2)

#Merge
dat_lpa_f1 <- merge(boots_dat, means, by=c("Rating", "Profile"))
dat_lpa_f1$Name <- substr(dat_lpa_f1$Rating, 8, 8 + 2 - 1)
dat_lpa_f1$Characteristic <- ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_1" | dat_lpa_f1$Rating=="EXSTYP_IT_1", "cold/warm", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_2" | dat_lpa_f1$Rating=="EXSTYP_IT_2", "dishonest/trustworthy", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_3" | dat_lpa_f1$Rating=="EXSTYP_IT_3", "selfish/willing to help", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_4" | dat_lpa_f1$Rating=="EXSTYP_IT_4", "incapable/skilled", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_5" | dat_lpa_f1$Rating=="EXSTYP_IT_5", "incompetent/competent", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_6" | dat_lpa_f1$Rating=="EXSTYP_IT_6", "despised/respected", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_7" | dat_lpa_f1$Rating=="EXSTYP_IT_7", "little/extensive knowledge of Italian politics", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_8" | dat_lpa_f1$Rating=="EXSTYP_IT_8", "does not/does promote Italian interests", 
ifelse(dat_lpa_f1$Rating=="EXSTYP_AL_9" | dat_lpa_f1$Rating=="EXSTYP_IT_9", "does not/does appreciate Italian culture", NA)))))))))

#Prepare data frame

dat_lpa_f1$Characteristic <- factor(dat_lpa_f1$Characteristic, 
levels = c("does not/does appreciate Italian culture", 
"does not/does promote Italian interests", "little/extensive knowledge of Italian politics", 
"despised/respected", "incompetent/competent", "incapable/skilled", 
"selfish/willing to help", "dishonest/trustworthy", "cold/warm"))
table(dat_lpa_f1$Characteristic)

names(dat_lpa_f1)[3] <- "lci"
names(dat_lpa_f1)[4] <- "hci"

dat_lpa_f1$Profile <- factor(dat_lpa_f1$Profile, 
levels = c(1, 2, 3),
labels = c("Profile A", "Profile B", "Profile C"))

dat_lpa_f1$Name <- factor(dat_lpa_f1$Name, 
labels = c("Algerian", "Italian"))

table(dat_lpa_f1$Characteristic)

#Plot
lpa_f1 <- ggplot(dat_lpa_f1, aes(dat_lpa_f1, group=Name)) + 
geom_errorbar(aes(x = Characteristic, ymin=lci, ymax = hci), 
width=.4, lwd=0.5, position=position_dodge(.5)) +
geom_point(aes(x=Characteristic, y=Mean, shape=Name), size=1.8, position=position_dodge(.5)) +
scale_y_continuous(limits=c(-1.5, 2)) +
labs(x="", y="Evaluation") + theme_bw() +
geom_hline(yintercept=0, linetype="dashed", 
color = "black", size=0.5) +
coord_flip() +
scale_colour_grey(guide=FALSE) + 
facet_wrap(~ Profile);lpa_f1

dat_text <- data.frame(
label = c("n=895", "n=451", "n=41"),
Profile = c("Profile A", "Profile B", "Profile C"),
x     = c(1, 1, 1),
y     = c(1.5, 1.5, 1.5)
)

lpa_f2 <- lpa_f1 + geom_text(
data    = dat_text,
mapping = aes(x = x, y = y, label = label), 
size = 3.5, inherit.aes = FALSE
)
lpa_f2

